library(tidyverse)## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.0 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.1 ✔ tibble 3.2.0
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(knitr)
library(DT)
library(plotly)##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(scales)##
## Attaching package: 'scales'
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
library(ggeasy)NEON_MAGs <- read_csv("data/GOLD_Study_ID_Gs0161344_NEON_edArchaea.csv") %>%
# remove columns that are not needed for data analysis
select(-c(`GOLD Study ID`, `Bin Methods`, `Created By`, `Date Added`)) %>%
# create a new column with the Assembly Type
mutate("Assembly Type" = case_when(`Genome Name` == "NEON combined assembly" ~ `Genome Name`,
TRUE ~ "Individual")) %>%
mutate_at("Assembly Type", str_replace, "NEON combined assembly", "Combined") %>%
separate(`GTDB-Tk Taxonomy Lineage`, c("Domain", "Phylum", "Class", "Order", "Family", "Genus"), "; ", remove = FALSE) %>%
# Get rid of the the common string "Soil microbial communities from "
mutate_at("Genome Name", str_replace, "Terrestrial soil microbial communities from ", "") %>%
# Use the first `-` to split the column in two
separate(`Genome Name`, c("Site","Sample Name"), " - ") %>%
# Get rid of the the common string "S-comp-1"
mutate_at("Sample Name", str_replace, "-comp-1", "") %>%
# separate the Sample Name into Site ID and plot info
separate(`Sample Name`, c("Site ID","subplot.layer.date"), "_", remove = FALSE,) %>%
# separate the plot info into 3 columns
separate(`subplot.layer.date`, c("Subplot", "Layer", "Date"), "-") ## Rows: 1754 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): Bin ID, Genome Name, Bin Quality, Bin Lineage, GTDB-Tk Taxonomy L...
## dbl (10): IMG Genome ID, Bin Completeness, Bin Contamination, Total Number ...
## date (1): Date Added
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: Expected 6 pieces. Additional pieces discarded in 46 rows [3, 4, 24, 25, 26,
## 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 54, 232, 267, ...].
## Warning: Expected 6 pieces. Missing pieces filled with `NA` in 446 rows [1, 2, 9, 10,
## 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 46, 50, 53, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 624 rows [4, 7, 8, 236,
## 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252,
## ...].
NEON_metagenomes <- read_tsv("data/exported_img_data.tsv") %>%
rename(`Genome Name` = `Genome Name / Sample Name`) %>%
filter(str_detect(`Genome Name`, 're-annotation', negate = T)) %>%
filter(str_detect(`Genome Name`, 'WREF plot', negate = T)) ## Rows: 176 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (13): Domain, Sequencing Status, Study Name, Genome Name / Sample Name, ...
## dbl (4): taxon_oid, IMG Genome ID, Genome Size * assembled, Gene Count * ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
NEON_metagenomes <- NEON_metagenomes %>%
# Get rid of the the common string "Soil microbial communities from "
mutate_at("Genome Name", str_replace, "Terrestrial soil microbial communities from ", "") %>%
# Use the first `-` to split the column in two
separate(`Genome Name`, c("Site","Sample Name"), " - ") %>%
# Get rid of the the common string "-comp-1"
mutate_at("Sample Name", str_replace, "-comp-1", "") %>%
# separate the Sample Name into Site ID and plot info
separate(`Sample Name`, c("Site ID","subplot.layer.date"), "_", remove = FALSE,) %>%
# separate the plot info into 3 columns
separate(`subplot.layer.date`, c("Subplot", "Layer", "Date"), "-") ## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1 rows [52].
NEON_chemistry <- read_tsv("data/neon_chem.tsv") %>%
# remove -COMP from genomicsSampleID
mutate_at("genomicsSampleID", str_replace, "-COMP", "") ## Rows: 87 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (5): genomicsSampleID, siteID, plotID, nlcdClass, horizon
## dbl (11): decimalLatitude, decimalLongitude, elevation, soilTemp, d15N, org...
## date (1): collectionDate
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
kable(
NEON_chemistry_description <- read_tsv("data/neon_chem.tsv")
)## Rows: 87 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (5): genomicsSampleID, siteID, plotID, nlcdClass, horizon
## dbl (11): decimalLatitude, decimalLongitude, elevation, soilTemp, d15N, org...
## date (1): collectionDate
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
| genomicsSampleID | siteID | plotID | nlcdClass | decimalLatitude | decimalLongitude | elevation | collectionDate | horizon | soilTemp | d15N | organicd13C | nitrogenPercent | organicCPercent | CNratio | soilInWaterpH | soilInCaClpH |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| GUAN_048-M-20210920-COMP | GUAN | GUAN_048 | evergreenForest | 17.96911 | -66.86428 | 130.1 | 2021-09-20 | M | 27.433333 | NA | NA | NA | NA | NA | 7.676414 | 7.050993 |
| GUAN_042-M-20210920-COMP | GUAN | GUAN_042 | evergreenForest | 17.97073 | -66.86397 | 144.8 | 2021-09-20 | M | 28.100000 | NA | NA | NA | NA | NA | 7.629825 | 7.308130 |
| GUAN_043-M-20210921-COMP | GUAN | GUAN_043 | evergreenForest | 17.96887 | -66.86768 | 113.5 | 2021-09-21 | M | 28.533333 | NA | NA | NA | NA | NA | 7.715340 | 7.373411 |
| GUAN_007-M-20210922-COMP | GUAN | GUAN_007 | evergreenForest | 17.97283 | -66.85771 | 172.4 | 2021-09-22 | M | 26.333333 | NA | NA | NA | NA | NA | 7.904983 | 7.424066 |
| GUAN_004-M-20210922-COMP | GUAN | GUAN_004 | evergreenForest | 17.96925 | -66.85267 | 131.1 | 2021-09-22 | M | 28.400000 | NA | NA | NA | NA | NA | 7.749693 | 7.279275 |
| GUAN_003-M-20210922-COMP | GUAN | GUAN_003 | evergreenForest | 17.97314 | -66.86170 | 181.0 | 2021-09-22 | M | 26.900000 | NA | NA | NA | NA | NA | 7.655891 | 7.278372 |
| GUAN_006-M-20210922-COMP | GUAN | GUAN_006 | evergreenForest | 17.96382 | -66.87567 | 98.9 | 2021-09-22 | M | 28.733333 | NA | NA | NA | NA | NA | 7.737007 | 7.262784 |
| KONZ_024-M-20210719-COMP | KONZ | KONZ_024 | deciduousForest | 39.11090 | -96.55221 | 351.0 | 2021-07-19 | M | 20.466667 | NA | NA | NA | NA | NA | 7.613580 | 7.183660 |
| KONZ_042-M-20210720-COMP | KONZ | KONZ_042 | grasslandHerbaceous | 39.09957 | -96.56440 | 401.2 | 2021-07-20 | M | 20.866667 | NA | NA | NA | NA | NA | 6.889637 | 6.230431 |
| KONZ_046-M-20210720-COMP | KONZ | KONZ_046 | shrubScrub | 39.10307 | -96.56392 | 405.7 | 2021-07-20 | M | 21.400000 | NA | NA | NA | NA | NA | 6.952601 | 6.420308 |
| KONZ_043-M-20210721-COMP | KONZ | KONZ_043 | grasslandHerbaceous | 39.10219 | -96.56118 | 405.4 | 2021-07-21 | M | 20.766667 | NA | NA | NA | NA | NA | 6.751302 | 5.941326 |
| KONZ_045-M-20210721-COMP | KONZ | KONZ_045 | grasslandHerbaceous | 39.10383 | -96.56181 | 392.1 | 2021-07-21 | M | 23.566667 | NA | NA | NA | NA | NA | 7.385327 | 6.786469 |
| WOOD_003-M-20210708-COMP | WOOD | WOOD_003 | grasslandHerbaceous | 47.11858 | -99.23994 | 585.7 | 2021-07-08 | M | 18.500000 | 7.0333333 | -21.10000 | 0.3533333 | 4.4766667 | NA | 6.583344 | 6.354476 |
| WOOD_002-M-20210708-COMP | WOOD | WOOD_002 | grasslandHerbaceous | 47.13613 | -99.23290 | 573.6 | 2021-07-08 | M | 18.900000 | 7.8333333 | -22.03333 | 0.4133333 | 5.8866667 | NA | 7.261331 | 6.707714 |
| WOOD_005-M-20210708-COMP | WOOD | WOOD_005 | grasslandHerbaceous | 47.14934 | -99.25193 | 590.3 | 2021-07-08 | M | 21.800000 | 5.9333333 | -19.30000 | 0.2433333 | 1.0000000 | NA | 7.928605 | 7.381795 |
| WOOD_043-M-20210712-COMP | WOOD | WOOD_043 | grasslandHerbaceous | 47.13100 | -99.24276 | 580.2 | 2021-07-12 | M | 19.833333 | 7.0333333 | -21.93333 | 0.4300000 | 3.7233333 | NA | 7.238510 | 6.688311 |
| WOOD_042-M-20210712-COMP | WOOD | WOOD_042 | grasslandHerbaceous | 47.12909 | -99.24592 | 584.5 | 2021-07-12 | M | 19.466667 | 6.9000000 | -23.20000 | 0.3433333 | 3.8100000 | NA | 5.565118 | 5.204280 |
| WOOD_001-M-20210714-COMP | WOOD | WOOD_001 | grasslandHerbaceous | 47.12826 | -99.25777 | 596.3 | 2021-07-14 | M | 18.233333 | 6.9666667 | -22.30000 | 0.3733333 | 3.8633333 | NA | 7.712703 | 7.110169 |
| WOOD_004-M-20210714-COMP | WOOD | WOOD_004 | grasslandHerbaceous | 47.12584 | -99.25380 | 594.1 | 2021-07-14 | M | 18.566667 | 7.2333333 | -23.93333 | 0.3966667 | 5.3533333 | NA | 6.662668 | 6.098532 |
| WOOD_024-M-20210714-COMP | WOOD | WOOD_024 | emergentHerbaceousWetlands | 47.15117 | -99.26265 | 586.6 | 2021-07-14 | M | NA | 5.9333333 | -24.13333 | 0.6200000 | 7.9300000 | NA | 7.925237 | 7.662862 |
| WOOD_024-O-20210714-COMP | WOOD | WOOD_024 | emergentHerbaceousWetlands | 47.15117 | -99.26265 | 586.6 | 2021-07-14 | O | 14.600000 | 4.5000000 | -29.30000 | 2.7700000 | 44.0100000 | NA | 7.290000 | 6.920000 |
| CLBJ_040-M-20210503-COMP | CLBJ | CLBJ_040 | deciduousForest | 33.37882 | -97.64669 | 330.4 | 2021-05-03 | M | 18.866667 | NA | NA | NA | NA | NA | 5.507978 | 4.886460 |
| CLBJ_038-M-20210504-COMP | CLBJ | CLBJ_038 | deciduousForest | 33.41426 | -97.59710 | 281.0 | 2021-05-04 | M | 17.900000 | NA | NA | NA | NA | NA | 6.022671 | 5.207261 |
| CLBJ_032-M-20210504-COMP | CLBJ | CLBJ_032 | grasslandHerbaceous | 33.40613 | -97.59400 | 289.9 | 2021-05-04 | M | 20.133333 | NA | NA | NA | NA | NA | 5.786092 | 4.698626 |
| CLBJ_033-M-20210505-COMP | CLBJ | CLBJ_033 | deciduousForest | 33.38097 | -97.62021 | 308.6 | 2021-05-05 | M | 16.233333 | NA | NA | NA | NA | NA | 6.227408 | 5.295270 |
| CLBJ_001-M-20210506-COMP | CLBJ | CLBJ_001 | deciduousForest | 33.39799 | -97.56834 | 278.1 | 2021-05-06 | M | 18.633333 | NA | NA | NA | NA | NA | 6.169993 | 5.292947 |
| CLBJ_003-M-20210506-COMP | CLBJ | CLBJ_003 | deciduousForest | 33.40399 | -97.57274 | 273.4 | 2021-05-06 | M | 17.066667 | NA | NA | NA | NA | NA | 6.123000 | 5.225470 |
| CLBJ_002-M-20210506-COMP | CLBJ | CLBJ_002 | deciduousForest | 33.40398 | -97.57114 | 263.1 | 2021-05-06 | M | 17.400000 | NA | NA | NA | NA | NA | 6.432538 | 5.547408 |
| CLBJ_006-M-20210506-COMP | CLBJ | CLBJ_006 | deciduousForest | 33.39823 | -97.56673 | 275.8 | 2021-05-06 | M | 17.566667 | NA | NA | NA | NA | NA | 5.903526 | 4.798404 |
| YELL_046-M-20210705-COMP | YELL | YELL_046 | evergreenForest | 44.95236 | -110.54158 | 2155.4 | 2021-07-05 | M | 13.500000 | NA | NA | NA | NA | NA | 6.480904 | 5.844094 |
| YELL_051-M-20210705-COMP | YELL | YELL_051 | shrubScrub | 44.95428 | -110.54157 | 2119.7 | 2021-07-05 | M | 14.766667 | NA | NA | NA | NA | NA | 5.940702 | 5.174314 |
| YELL_002-M-20210706-COMP | YELL | YELL_002 | shrubScrub | 44.93247 | -110.63490 | 2125.1 | 2021-07-06 | M | 13.233333 | NA | NA | NA | NA | NA | 6.171958 | 5.252755 |
| YELL_009-M-20210706-COMP | YELL | YELL_009 | evergreenForest | 44.97031 | -110.50188 | 2000.3 | 2021-07-06 | M | 16.266667 | NA | NA | NA | NA | NA | 6.480409 | 5.709434 |
| YELL_048-M-20210707-COMP | YELL | YELL_048 | evergreenForest | 44.95127 | -110.53665 | 2149.0 | 2021-07-07 | M | 14.466667 | NA | NA | NA | NA | NA | 5.780632 | 4.941156 |
| YELL_016-M-20210708-COMP | YELL | YELL_016 | grasslandHerbaceous | 44.96577 | -110.58371 | 2046.8 | 2021-07-08 | M | 18.733333 | NA | NA | NA | NA | NA | 6.260325 | 5.335124 |
| YELL_012-O-20210708-COMP | YELL | YELL_012 | evergreenForest | 44.94461 | -110.43366 | 1901.5 | 2021-07-08 | O | 12.700000 | NA | NA | NA | NA | NA | 6.556216 | 5.906809 |
| YELL_003-M-20210708-COMP | YELL | YELL_003 | shrubScrub | 44.95478 | -110.53320 | 2120.8 | 2021-07-08 | M | 18.433333 | NA | NA | NA | NA | NA | 6.318361 | 5.323576 |
| YELL_005-M-20210708-COMP | YELL | YELL_005 | shrubScrub | 44.94838 | -110.63138 | 2112.0 | 2021-07-08 | M | 26.833333 | NA | NA | NA | NA | NA | 6.531477 | 5.640862 |
| NIWO_005-M-20210726-COMP | NIWO | NIWO_005 | evergreenForest | 40.04366 | -105.56990 | 3284.7 | 2021-07-26 | M | 15.166667 | NA | NA | NA | NA | NA | 5.122946 | 4.345213 |
| NIWO_004-M-20210726-COMP | NIWO | NIWO_004 | evergreenForest | 40.04306 | -105.58150 | 3312.5 | 2021-07-26 | M | 15.400000 | NA | NA | NA | NA | NA | 5.106216 | 4.184691 |
| NIWO_004-O-20210726-COMP | NIWO | NIWO_004 | evergreenForest | 40.04306 | -105.58150 | 3312.5 | 2021-07-26 | O | 11.800000 | NA | NA | NA | NA | NA | 4.400000 | 3.870000 |
| NIWO_003-M-20210727-COMP | NIWO | NIWO_003 | grasslandHerbaceous | 40.05125 | -105.56504 | 3494.9 | 2021-07-27 | M | 28.433333 | NA | NA | NA | NA | NA | 5.919518 | 5.069312 |
| NIWO_002-M-20210728-COMP | NIWO | NIWO_002 | evergreenForest | 40.04106 | -105.54704 | 3059.6 | 2021-07-28 | M | 12.700000 | NA | NA | NA | NA | NA | 4.571461 | 3.825102 |
| NIWO_001-O-20210728-COMP | NIWO | NIWO_001 | evergreenForest | 40.04234 | -105.55898 | 3213.5 | 2021-07-28 | O | 14.133333 | NA | NA | NA | NA | NA | 4.455028 | 3.727124 |
| SRER_004-M-20210809-COMP | SRER | SRER_004 | shrubScrub | 31.90678 | -110.81526 | 1044.5 | 2021-08-09 | M | 26.366667 | 6.3666667 | -18.83333 | 0.1066667 | 1.1266667 | NA | 7.543272 | 6.837816 |
| SRER_047-M-20210809-COMP | SRER | SRER_047 | shrubScrub | 31.91036 | -110.83844 | 990.6 | 2021-08-09 | M | 25.800000 | 7.3666667 | -20.76667 | 0.0500000 | 0.7300000 | NA | 8.788773 | 8.137896 |
| SRER_043-M-20210809-COMP | SRER | SRER_043 | shrubScrub | 31.91010 | -110.83718 | 993.4 | 2021-08-09 | M | 26.233333 | 5.9333333 | -21.50000 | 0.0700000 | 0.8466667 | NA | 8.659768 | 8.062928 |
| SRER_006-M-20210809-COMP | SRER | SRER_006 | shrubScrub | 31.79566 | -110.91024 | 1048.6 | 2021-08-09 | M | 26.533333 | 7.0000000 | -20.16667 | 0.0733333 | 0.6966667 | NA | 7.242389 | 6.190785 |
| SRER_053-M-20210810-COMP | SRER | SRER_053 | shrubScrub | 31.90982 | -110.83591 | 996.7 | 2021-08-10 | M | 24.466667 | 6.8666667 | -19.80000 | 0.0566667 | 0.4700000 | NA | 8.570279 | 8.002612 |
| SRER_052-M-20210810-COMP | SRER | SRER_052 | shrubScrub | 31.90953 | -110.83336 | 1002.3 | 2021-08-10 | M | 26.833333 | 6.8000000 | -20.10000 | 0.0466667 | 0.5900000 | NA | 8.723155 | 8.086253 |
| SRER_005-M-20210810-COMP | SRER | SRER_005 | shrubScrub | 31.82884 | -110.82398 | 1261.1 | 2021-08-10 | M | 27.100000 | 5.7000000 | -18.96667 | 0.1500000 | 2.3800000 | NA | 6.333651 | 5.734877 |
| ONAQ_002-M-20210524-COMP | ONAQ | ONAQ_002 | shrubScrub | 40.19332 | -112.46455 | 1688.0 | 2021-05-24 | M | 12.000000 | 7.3333333 | -23.20000 | 0.1133333 | 1.8933333 | NA | 8.651367 | 7.946743 |
| ONAQ_008-M-20210524-COMP | ONAQ | ONAQ_008 | evergreenForest | 40.15854 | -112.52157 | 1795.9 | 2021-05-24 | M | 11.333333 | 4.8333333 | -24.20000 | 0.1966667 | 1.7433333 | NA | 8.316454 | 7.660087 |
| ONAQ_004-M-20210525-COMP | ONAQ | ONAQ_004 | shrubScrub | 40.18594 | -112.47248 | 1713.7 | 2021-05-25 | M | 14.166667 | 8.0000000 | -21.86667 | 0.0866667 | 1.0533333 | NA | 9.010762 | 8.170844 |
| ONAQ_010-M-20210526-COMP | ONAQ | ONAQ_010 | evergreenForest | 40.20104 | -112.49713 | 1903.7 | 2021-05-26 | M | 13.500000 | 4.0333333 | -23.66667 | 0.2133333 | 2.2266667 | NA | 8.252551 | 7.547785 |
| ONAQ_005-M-20210527-COMP | ONAQ | ONAQ_005 | shrubScrub | 40.18077 | -112.43185 | 1614.4 | 2021-05-27 | M | 14.533333 | 8.0666667 | -19.76667 | 0.0966667 | 0.7500000 | NA | 8.595864 | 7.862242 |
| ONAQ_003-M-20210527-COMP | ONAQ | ONAQ_003 | shrubScrub | 40.20592 | -112.43028 | 1610.9 | 2021-05-27 | M | 16.333333 | 7.4000000 | -21.96667 | 0.1066667 | 1.4800000 | NA | 8.529063 | 7.894134 |
| WREF_001-O-20210621-COMP | WREF | WREF_001 | evergreenForest | 45.84403 | -121.99907 | 666.4 | 2021-06-21 | O | 13.166667 | NA | NA | NA | NA | NA | 4.293902 | 3.435722 |
| WREF_004-M-20210622-COMP | WREF | WREF_004 | evergreenForest | 45.82294 | -121.99871 | 567.8 | 2021-06-22 | M | 14.300000 | NA | NA | NA | NA | NA | 5.160000 | 4.370000 |
| WREF_004-O-20210622-COMP | WREF | WREF_004 | evergreenForest | 45.82294 | -121.99871 | 567.8 | 2021-06-22 | O | 14.200000 | NA | NA | NA | NA | NA | 4.359275 | 3.616734 |
| WREF_003-M-20210622-COMP | WREF | WREF_003 | evergreenForest | 45.83152 | -122.01861 | 602.1 | 2021-06-22 | M | 13.800000 | NA | NA | NA | NA | NA | 5.067128 | 4.314741 |
| WREF_003-O-20210622-COMP | WREF | WREF_003 | evergreenForest | 45.83152 | -122.01861 | 602.1 | 2021-06-22 | O | 14.300000 | NA | NA | NA | NA | NA | 3.970000 | 3.160000 |
| WREF_073-O-20210623-COMP | WREF | WREF_073 | evergreenForest | 45.82584 | -121.96013 | 371.8 | 2021-06-23 | O | 15.650000 | NA | NA | NA | NA | NA | 4.515045 | 3.673689 |
| WREF_073-M-20210623-COMP | WREF | WREF_073 | evergreenForest | 45.82584 | -121.96013 | 371.8 | 2021-06-23 | M | 14.700000 | NA | NA | NA | NA | NA | 4.970000 | 4.250000 |
| TEAK_043-M-20210719-COMP | TEAK | TEAK_043 | evergreenForest | 36.99970 | -119.01104 | 2141.8 | 2021-07-19 | M | 18.400000 | 1.9000000 | -24.93333 | 0.2433333 | 6.6966667 | 26.70000 | 5.475909 | 4.666921 |
| TEAK_002-O-20210720-COMP | TEAK | TEAK_002 | evergreenForest | 36.97845 | -119.03569 | 2377.1 | 2021-07-20 | O | 14.000000 | -1.4000000 | -26.00000 | 1.3600000 | 36.8600000 | NA | 6.200000 | 5.870000 |
| TEAK_003-M-20210726-COMP | TEAK | TEAK_003 | evergreenForest | 37.01296 | -119.01062 | 2201.1 | 2021-07-26 | M | 18.900000 | 2.2333333 | -24.40000 | 0.2600000 | 6.0766667 | 23.06667 | 5.884481 | 5.132917 |
| TEAK_025-M-20210726-COMP | TEAK | TEAK_025 | shrubScrub | 36.99000 | -119.02451 | 2305.0 | 2021-07-26 | M | 20.266667 | 1.6000000 | -24.73333 | 0.1833333 | 4.5000000 | 24.50000 | 5.211493 | 4.481673 |
| TEAK_004-M-20210726-COMP | TEAK | TEAK_004 | evergreenForest | 37.00169 | -119.03630 | 2190.5 | 2021-07-26 | M | 21.100000 | 2.1666667 | -24.30000 | 0.1466667 | 3.3033333 | 22.73333 | 5.601539 | 4.848597 |
| TEAK_004-O-20210726-COMP | TEAK | TEAK_004 | evergreenForest | 37.00169 | -119.03630 | 2190.5 | 2021-07-26 | O | 22.000000 | -1.5000000 | -26.40000 | 0.6400000 | 18.2700000 | NA | 5.020000 | 4.300000 |
| TEAK_005-M-20210728-COMP | TEAK | TEAK_005 | evergreenForest | 37.05823 | -118.98858 | 2727.2 | 2021-07-28 | M | 19.033333 | 1.7000000 | -23.20000 | 0.1200000 | 3.4266667 | 28.83333 | 5.388746 | 4.643448 |
| TEAK_005-O-20210728-COMP | TEAK | TEAK_005 | evergreenForest | 37.05823 | -118.98858 | 2727.2 | 2021-07-28 | O | 18.000000 | -1.7000000 | -26.50000 | 1.1200000 | 38.4600000 | NA | 5.550000 | 5.040000 |
| TOOL_041-O-20210803-COMP | TOOL | TOOL_041 | sedgeHerbaceous | 68.66667 | -149.36975 | 827.4 | 2021-08-03 | O | 8.400000 | NA | NA | NA | NA | NA | 5.381765 | 5.113456 |
| TOOL_043-O-20210803-COMP | TOOL | TOOL_043 | sedgeHerbaceous | 68.66551 | -149.37552 | 821.6 | 2021-08-03 | O | 8.133333 | NA | NA | NA | NA | NA | 4.983938 | 4.211318 |
| TOOL_042-O-20210803-COMP | TOOL | TOOL_042 | sedgeHerbaceous | 68.66407 | -149.38129 | 805.4 | 2021-08-03 | O | 3.466667 | NA | NA | NA | NA | NA | 5.339322 | 4.510074 |
| TOOL_044-O-20210803-COMP | TOOL | TOOL_044 | sedgeHerbaceous | 68.65816 | -149.36219 | 822.4 | 2021-08-03 | O | 3.800000 | NA | NA | NA | NA | NA | 5.216081 | 5.041234 |
| TOOL_006-O-20210804-COMP | TOOL | TOOL_006 | shrubScrub | 68.62213 | -149.28018 | 931.5 | 2021-08-04 | O | 4.766667 | NA | NA | NA | NA | NA | 5.823965 | 5.020627 |
| TOOL_002-O-20210804-COMP | TOOL | TOOL_002 | dwarfScrub | 68.62794 | -149.34723 | 843.8 | 2021-08-04 | O | 2.300000 | NA | NA | NA | NA | NA | 5.329583 | 5.416669 |
| TOOL_004-O-20210805-COMP | TOOL | TOOL_004 | dwarfScrub | 68.61677 | -149.62936 | 782.2 | 2021-08-05 | O | 4.366667 | NA | NA | NA | NA | NA | 4.874882 | 4.256399 |
| TOOL_003-O-20210805-COMP | TOOL | TOOL_003 | sedgeHerbaceous | 68.64025 | -149.64246 | 707.8 | 2021-08-05 | O | 7.833333 | NA | NA | NA | NA | NA | 6.174145 | 5.815095 |
| TOOL_005-O-20210806-COMP | TOOL | TOOL_005 | dwarfScrub | 68.56015 | -149.52853 | 834.7 | 2021-08-06 | O | 3.000000 | NA | NA | NA | NA | NA | 4.933520 | 4.886442 |
| BONA_009-O-20210707-COMP | BONA | BONA_009 | shrubScrub | 65.16919 | -147.52028 | 429.6 | 2021-07-07 | O | 6.333333 | NA | NA | NA | NA | NA | 4.532716 | 4.227224 |
| BONA_004-O-20210707-COMP | BONA | BONA_004 | evergreenForest | 65.19067 | -147.53669 | 668.9 | 2021-07-07 | O | 6.966667 | NA | NA | NA | NA | NA | 4.435072 | 4.077485 |
| BONA_006-O-20210707-COMP | BONA | BONA_006 | evergreenForest | 65.17611 | -147.54409 | 507.0 | 2021-07-07 | O | 10.233333 | NA | NA | NA | NA | NA | 4.116916 | 3.913903 |
| BONA_001-O-20210708-COMP | BONA | BONA_001 | deciduousForest | 65.17445 | -147.47815 | 374.1 | 2021-07-08 | O | 7.800000 | NA | NA | NA | NA | NA | 4.126778 | 4.002020 |
| HEAL_048-O-20210622-COMP | HEAL | HEAL_048 | dwarfScrub | 63.87509 | -149.21044 | 677.6 | 2021-06-22 | O | 3.066667 | 0.4333333 | -26.06667 | 1.2533333 | 43.3600000 | 36.13333 | 3.904213 | 3.610838 |
| HEAL_048-M-20210622-COMP | HEAL | HEAL_048 | dwarfScrub | 63.87509 | -149.21044 | 677.6 | 2021-06-23 | M | 3.300000 | 1.1500000 | -25.75000 | 1.4600000 | 39.5050000 | 27.75000 | 4.324382 | 3.708965 |
band_members## # A tibble: 3 × 2
## name band
## <chr> <chr>
## 1 Mick Stones
## 2 John Beatles
## 3 Paul Beatles
band_instruments## # A tibble: 3 × 2
## name plays
## <chr> <chr>
## 1 John guitar
## 2 Paul bass
## 3 Keith guitar
band_members %>%
left_join(band_instruments, by = "name")## # A tibble: 3 × 3
## name band plays
## <chr> <chr> <chr>
## 1 Mick Stones <NA>
## 2 John Beatles guitar
## 3 Paul Beatles bass
band_members %>%
right_join(band_instruments, by = "name")## # A tibble: 3 × 3
## name band plays
## <chr> <chr> <chr>
## 1 John Beatles guitar
## 2 Paul Beatles bass
## 3 Keith <NA> guitar
band_members %>%
inner_join(band_instruments, by = "name")## # A tibble: 2 × 3
## name band plays
## <chr> <chr> <chr>
## 1 John Beatles guitar
## 2 Paul Beatles bass
band_members %>%
full_join(band_instruments, by = "name")## # A tibble: 4 × 3
## name band plays
## <chr> <chr> <chr>
## 1 Mick Stones <NA>
## 2 John Beatles guitar
## 3 Paul Beatles bass
## 4 Keith <NA> guitar
table1 %>%
left_join(table3, by = c("country", "year"))## # A tibble: 6 × 5
## country year cases population rate
## <chr> <dbl> <dbl> <dbl> <chr>
## 1 Afghanistan 1999 745 19987071 745/19987071
## 2 Afghanistan 2000 2666 20595360 2666/20595360
## 3 Brazil 1999 37737 172006362 37737/172006362
## 4 Brazil 2000 80488 174504898 80488/174504898
## 5 China 1999 212258 1272915272 212258/1272915272
## 6 China 2000 213766 1280428583 213766/1280428583
band_members %>%
left_join(band_instruments2, by = c(name = "artist"))## # A tibble: 3 × 3
## name band plays
## <chr> <chr> <chr>
## 1 Mick Stones <NA>
## 2 John Beatles guitar
## 3 Paul Beatles bass
table4a %>%
left_join(table4b, by = "country", suffix = c("_cases", "_pop"))## # A tibble: 3 × 5
## country `1999_cases` `2000_cases` `1999_pop` `2000_pop`
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Afghanistan 745 2666 19987071 20595360
## 2 Brazil 37737 80488 172006362 174504898
## 3 China 212258 213766 1272915272 1280428583
Create some tables with just a few columns to work with
# In Neon MAGS, the columns Sample Name, Site ID, GTDB-Tk Taxonomy Lineage'
small_MAGS <- NEON_MAGs %>%
select(`Sample Name`, `Site ID`, `GTDB-Tk Taxonomy Lineage`)
datatable(small_MAGS)# In NEON metagenomes, the columns Sample Name, Site ID, Ecosystem Subtype
small_metagenomes <- NEON_metagenomes %>%
select(`Sample Name`, `Site ID`, `Ecosystem Subtype`)
datatable(small_metagenomes)# In NEON Chemistry, the columns genomicsSampleID, siteID, nlcdClass
small_chemistry <- NEON_chemistry %>%
select(`genomicsSampleID`, `siteID`, `nlcdClass`)
datatable(small_chemistry)Filter to contain just the data for your project site
filtered_small_MAGS <- small_MAGS %>%
filter(`Site ID` == "CLBJ" )
filtered_small_metagenomes <- small_metagenomes %>%
filter(`Site ID` == "CLBJ")
filtered_small_chemistry <- small_chemistry %>%
filter(`siteID` == "CLBJ")Do a left join of the NEON MAGs with NEON metagenomes vy the sample name and show the resulting table.
NEON_MAGs %>%
left_join(NEON_metagenomes, by = "Sample Name") %>%
datatable()Using the data from your site do a left join of NEON chemistry with NEON metagenomes by Sample Name and genomicsSampleID columns and show the table.
filtered_small_chemistry %>%
left_join(filtered_small_metagenomes, by = c("genomicsSampleID" = "Sample Name")) %>%
datatable()Does it matter with these tables if you do a left, right, or full join?
filtered_small_chemistry %>%
left_join(filtered_small_metagenomes, by = c("genomicsSampleID" = "Sample Name")) %>%
datatable()filtered_small_chemistry %>%
right_join(filtered_small_metagenomes, by = c("genomicsSampleID" = "Sample Name")) %>%
datatable()filtered_small_chemistry %>%
full_join(filtered_small_metagenomes, by = c("genomicsSampleID" = "Sample Name")) %>%
datatable()In this case it doesn’t matter because they all have the same data.
Do a left join of the NEON chemistry and NEON metagenomes by site ID and show the resulting table.
NEON_chemistry %>%
left_join(NEON_metagenomes, by = c("siteID" = "Site ID")) %>%
datatable()## Warning in left_join(., NEON_metagenomes, by = c(siteID = "Site ID")): Each row in `x` is expected to match at most 1 row in `y`.
## ℹ Row 1 of `x` matches multiple rows.
## ℹ If multiple matches are expected, set `multiple = "all"` to silence this
## warning.
Join the NEON MAG, metagenome, and chemistry dataframes into a single dataframe. What happens to the metagenome and chemistry information on the rows with the NEON coassembly?
partial_merged <-NEON_MAGs %>%
full_join(NEON_chemistry, by = c("Site ID" = "siteID"))## Warning in full_join(., NEON_chemistry, by = c(`Site ID` = "siteID")): Each row in `x` is expected to match at most 1 row in `y`.
## ℹ Row 1 of `x` matches multiple rows.
## ℹ If multiple matches are expected, set `multiple = "all"` to silence this
## warning.
merged <- partial_merged %>%
full_join(NEON_metagenomes, by = "Site ID")## Warning in full_join(., NEON_metagenomes, by = "Site ID"): Each row in `x` is expected to match at most 1 row in `y`.
## ℹ Row 1 of `x` matches multiple rows.
## ℹ If multiple matches are expected, set `multiple = "all"` to silence this
## warning.
#I commented this out to keep my browser from crashing. There were 63,000 and something entries though
#datatable(merged)Filter the above table to contain data just for your project taxonomic group. Make a boxplot of soil temperatures for each sample at the sites.
merged %>%
filter(Phylum == "Actinobacteriota") %>%
ggplot(mapping = aes(x = fct_infreq(`Sample Name.x`), y = `soilTemp`, fill = `Site.x`)) +
geom_boxplot() +
theme(legend.position = "bottom") +
theme(legend.justification = "left") +
theme(legend.key.size = unit( 0.4, 'cm')) +
theme(legend.key.height = unit(0.4, 'cm')) +
theme(legend.key.width = unit(0.4, 'cm')) +
theme(legend.title = element_text(colour = "black", size = 10, face = "bold")) +
theme(legend.text = element_text(colour = "black", size = 10)) +
theme(legend.box.background = element_rect()) +
theme(legend.box.margin = margin(14, 14, 14, 14)) +
theme(legend.box.just = "center") +
theme( axis.text.x = element_text(size = 14, angle = 90)) +
theme(axis.line.y = element_line(linewidth = 0.25)) +
scale_x_discrete(labels = wrap_format(50)) +
scale_y_continuous(n.breaks = 12) +
theme(axis.text.y = element_text(size = 20)) +
xlab("Sample") +
ylab("Temperature (Celcius)") +
labs(title = str_wrap("Soil Temperature of samples by site", width = 30)) +
ggeasy::easy_center_title() ## Warning: Removed 730 rows containing non-finite values (`stat_boxplot()`).
Make a scatterplot of Ecosystem Subtype vs Temperature. Color by Order
merged %>%
filter(Phylum == "Actinobacteriota") %>%
ggplot(mapping = aes(x = `nlcdClass`, y = `soilTemp`, color = `Order`)) +
geom_point() +
theme( axis.text.x = element_text(size = 14, angle = 90)) +
theme(axis.line.y = element_line(linewidth = 0.25)) +
scale_y_continuous(n.breaks = 12) +
theme(axis.text.y = element_text(size = 20)) +
xlab("Ecosystem") +
ylab("Temperature (Celcius)") +
labs(title = "Soil Temperature of Ecosystems by Order", width = 30) +
ggeasy::easy_center_title() ## Warning: Removed 730 rows containing missing values (`geom_point()`).
Make a scatterplots of soillnCaClpH vs ncldClass. Use Family as the color for points.
merged %>%
filter(Phylum == "Actinobacteriota") %>%
ggplot(mapping = aes(x = `nlcdClass`, y = `soilInCaClpH`, color = `Family`)) +
geom_point() +
theme( axis.text.x = element_text(size = 14, angle = 90)) +
theme(axis.line.y = element_line(linewidth = 0.25)) +
scale_y_continuous(n.breaks = 12) +
theme(axis.text.y = element_text(size = 20)) +
xlab("Ecosystem") +
ylab("Temperature (Celcius)") +
labs(title = "Soil Temperature of Ecosystems by Order", width = 30) +
ggeasy::easy_center_title() ## Warning: Removed 253 rows containing missing values (`geom_point()`).
Here is a graph of the number of Actinobacteria by pH. Here we can see which genera do best in which pH range.
merged %>%
filter(Phylum == "Actinobacteriota") %>%
ggplot(mapping = aes(x = fct_infreq(`Sample Name.x`), y = `soilInWaterpH`, fill = `Genus`)) +
geom_boxplot() +
theme(legend.position = "bottom") +
theme(legend.justification = "right") +
theme(legend.key.size = unit( 0.4, 'cm')) +
theme(legend.key.height = unit(0.4, 'cm')) +
theme(legend.key.width = unit(0.4, 'cm')) +
theme(legend.title = element_text(colour = "black", size = 10, face = "bold")) +
theme(legend.text = element_text(colour = "black", size = 10)) +
theme(legend.box.background = element_rect()) +
theme(legend.box.margin = margin(14, 14, 14, 14)) +
theme(legend.box.just = "center") +
theme( axis.text.x = element_text(size = 20, angle = 90)) +
theme(axis.line.y = element_line(linewidth = 0.25)) +
scale_x_discrete(labels = wrap_format(50)) +
scale_y_continuous(n.breaks = 12) +
theme(axis.text.y = element_text(size = 40)) +
xlab("Sample") +
ylab("pH") +
labs(title = "The pH Actinobacteria were found in by Genus", width = 30) +
ggeasy::easy_center_title() +
ggeasy::easy_x_axis_title_size(size = 60) +
ggeasy::easy_y_axis_title_size(size = 60)## Warning: Removed 253 rows containing non-finite values (`stat_boxplot()`).
Exercise 12
Here is a graph of Actinobacteria as it relates to nitrogen in the soil. With this we might discern which genera are important for fixing nitrogen at each site.
merged %>%
filter(Phylum == "Actinobacteriota") %>%
ggplot(mapping = aes(x = `Sample Name.x`, y = `nitrogenPercent`, color = `Genus`)) +
geom_boxplot() +
theme( axis.text.x = element_text(size = 6, angle = 90)) +
theme(axis.line.y = element_line(linewidth = 0.25)) +
scale_y_continuous(n.breaks = 12) +
theme(axis.text.y = element_text(size = 20)) +
xlab("Sample") +
ylab("Percent Nitrogen") +
labs(title = "Soil Temperature of Ecosystems by Order", width = 30) +
ggeasy::easy_center_title() +
ggeasy::easy_adjust_legend(to = c("center")) +
ggeasy::easy_change_legend(to = c("bottom"))## Warning: Removed 14674 rows containing non-finite values (`stat_boxplot()`).
merged %>%
filter(Phylum == "Actinobacteriota") %>%
ggplot(mapping = aes(x = `Sample Name.x`, y = `organicd13C`, color = `Site.x`)) +
geom_boxplot() +
theme( axis.text.x = element_text(size = 6, angle = 90)) +
theme(axis.line.y = element_line(linewidth = 0.25)) +
scale_y_continuous(n.breaks = 12) +
theme(axis.text.y = element_text(size = 20)) +
xlab("Sample") +
ylab("13C") +
labs(title = "Carbon 13 isoptope in Actinobacteriota samples", width = 30) +
ggeasy::easy_center_title() +
ggeasy::easy_adjust_legend(to = c("center")) +
ggeasy::easy_change_legend(to = c("bottom"))## Warning: Removed 14674 rows containing non-finite values (`stat_boxplot()`).